# coding=utf-8
# 这里的实现过程对算法进行了稍微的优化(运算结果应该稍快一些)：首先计算两个字符串开头以及结尾
# 的共同字符子串，然后去掉相同部分，重新构造新字符串，减少迭代过程次数。
# 如：string_a = "GGATCGA" string_b = "GAATTCAGTTA"构造的新字符串为：
# new_a = "AATTCAGTT" new_b = "GATCG"


def LD(string_a, string_b):
    diff_start = -1
    diff_end = -1
    len_a = len(string_a)
    len_b = len(string_b)
    short = min(len_a, len_b)

    # 寻找开头和结尾的共同字符串，并记录位置
    for i in range(0, short):
        if string_a[i] != string_b[i]:
            diff_start = i
            break
    if diff_start == -1:
        return abs(len_b - len_a)
    for i in range(0, short):
        if string_a[len_a - i -1] != string_b[len_b - i - 1]:
            diff_end = i
            break
    if diff_end == -1:
        return abs(len_b - len_a)

    # L(A+C, B+C) = LD(A, B)
    # 出去开头以及结尾相同的字符串，构建新的字符串
    long_str = unicode(string_a[diff_start: len_a - diff_end] if len_a >= len_b else string_b[diff_start: len_b - diff_end])
    short_str = unicode(string_a[diff_start: len_a - diff_end] if len_a < len_b else string_b[diff_start: len_b - diff_end])
    print long_str
    print short_str

    long_len = len(long_str)
    short_len = len(short_str)

    # store保存迭代过程中每次计算的结果
    store = range(0, long_len + 1)

    for i in range(short_len):
        temp = [i+1] * (long_len + 1)
        for j in range(long_len):
            if long_str[j] == short_str[i]:
                temp[j+1] = store[j]
            else:
                # 注意这时各个位置数据的对应关系
                temp[j+1] = min(store[j], store[j+1], temp[j]) + 1
        store = temp
        # print store
    # 最右下角即为编辑距离
    return store[-1]


string_a = "GGATCGA"
string_b = "GAATTCAGTTA"

ld = LD(string_a, string_b)
print ld